{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "939d3bb5-c006-4943-90a9-bdfae7b07512",
   "metadata": {},
   "source": [
    "## 【实操】Milvus的基本功能"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1ec3af7b-bbba-4030-adac-3653dbba28d7",
   "metadata": {},
   "source": [
    "### 建表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7e565ae8-bcd7-4983-b26f-3f131abbbee6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 在pymilvus 中，指定一个本地文件名作为 MilvusClient 的 uri 参数将使用 Milvus Lite\n",
    "# from pymilvus import MilvusClient\n",
    "# client = MilvusClient(\"./milvus_demo.db\")\n",
    "# 远程的也同理client = MilvusClient(uri=\"http://localhost:19530\", token=\"username:password\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e03a17ec-2878-4bc0-8ad3-4341da3d70a9",
   "metadata": {},
   "source": [
    "### 演示attu工具的使用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e44b93b8-f47b-4202-856b-9dfe8d3afa41",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "v2.3.5-lite\n"
     ]
    }
   ],
   "source": [
    "# 将 Milvus Lite 作为 Python 模块启动\n",
    "# 演示attu的界面\n",
    "\n",
    "from milvus import default_server\n",
    "from pymilvus import connections, utility\n",
    "\n",
    "# Start your milvus server\n",
    "default_server.start()\n",
    "\n",
    "# Now you can connect with localhost and the given port\n",
    "# Port is defined by default_server.listen_port\n",
    "connections.connect(host='127.0.0.1', port=default_server.listen_port)\n",
    "\n",
    "# Check if the server is ready.\n",
    "print(utility.get_server_version())\n",
    "\n",
    "# Stop your milvus server\n",
    "# default_server.stop()\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ceb6af8a-9984-4422-b000-7bb0b9b99ac9",
   "metadata": {},
   "source": [
    "### 创建collection并插入数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c1c1fcac-55b6-45c7-8a71-d56856b304b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# # 以下是如何使用 Milvus Lite 进行文本搜索的简单演示\n",
    "from pymilvus import MilvusClient\n",
    "\n",
    "# # client = MilvusClient(\"./milvus_demo.db\")\n",
    "client = MilvusClient(uri=\"http://127.0.0.1:19530\")\n",
    "client.create_collection(\n",
    "    collection_name=\"demo_collection\",\n",
    "    dimension=384 \n",
    ")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9c925b0b-96db-4e57-9ae2-1b98db555943",
   "metadata": {},
   "source": [
    "### 插入示例数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "dcf623b4-21ec-41db-ab75-ac81ed5b8f6b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "docs = [\n",
    "    \"Artificial intelligence was founded as an academic discipline in 1956.\",\n",
    "    \"Alan Turing was the first person to conduct substantial research in AI.\",\n",
    "]\n",
    "\n",
    "vectors = [[ np.random.uniform(-1, 1) for _ in range(384) ] for _ in range(len(docs)) ]\n",
    "data = [ {\"id\": i, \"vector\": vectors[i], \"text\": docs[i], \"subject\": \"history\"} for i in range(len(vectors)) ]\n",
    "res = client.insert(\n",
    "    collection_name=\"demo_collection\",\n",
    "    data=data\n",
    ")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5bee70cd-fb55-4de9-936f-2fe248df7b24",
   "metadata": {},
   "source": [
    "### 查询数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "db4aad96-646c-4142-81d7-f90eb26bd487",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "data: [[{'id': 0, 'distance': 1.0, 'entity': {'text': 'Artificial intelligence was founded as an academic discipline in 1956.', 'subject': 'history'}}, {'id': 1, 'distance': 0.000612135510891676, 'entity': {'text': 'Alan Turing was the first person to conduct substantial research in AI.', 'subject': 'history'}}]]\n"
     ]
    }
   ],
   "source": [
    "res = client.search(\n",
    "    collection_name=\"demo_collection\",\n",
    "    data=[vectors[0]],\n",
    "    filter=\"subject == 'history'\",\n",
    "    limit=2,\n",
    "    output_fields=[\"text\", \"subject\"],\n",
    ")\n",
    "print(res)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "6236b5b0-40bd-4493-ae25-60c27d82dbe2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "data: [\"{'text': 'Artificial intelligence was founded as an academic discipline in 1956.', 'subject': 'history', 'id': 0}\", \"{'text': 'Alan Turing was the first person to conduct substantial research in AI.', 'subject': 'history', 'id': 1}\"]\n"
     ]
    }
   ],
   "source": [
    "res = client.query(\n",
    "    collection_name=\"demo_collection\",\n",
    "    filter=\"subject == 'history'\",\n",
    "    output_fields=[\"text\", \"subject\"],\n",
    ")\n",
    "print(res)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "66e997a7-8f93-4766-9227-49b37b7e58ec",
   "metadata": {},
   "source": [
    "### 其他一些常用功能"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e875ea1e-c34e-4720-8ad0-21256dd69b56",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collection schema: {'collection_name': 'demo_collection', 'auto_id': False, 'num_shards': 1, 'description': '', 'fields': [{'field_id': 100, 'name': 'id', 'description': '', 'type': <DataType.INT64: 5>, 'params': {}, 'is_primary': True}, {'field_id': 101, 'name': 'vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 384}}], 'functions': [], 'aliases': [], 'collection_id': 458653500473081913, 'consistency_level': 2, 'properties': {}, 'num_partitions': 1, 'enable_dynamic_field': True, 'created_timestamp': 458653503242371075}\n"
     ]
    }
   ],
   "source": [
    "# 查看collection信息\n",
    "schema = client.describe_collection(collection_name=\"demo_collection\")\n",
    "print(\"Collection schema:\", schema)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "4bc36c9d-9b78-4446-af39-bef7ad9a27d9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collections: ['demo_collection']\n"
     ]
    }
   ],
   "source": [
    "# 列出所有集合\n",
    "collections = client.list_collections()\n",
    "print(\"Collections:\", collections)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "09fec577-3c98-4fdd-a971-1fb6d4b4bdb1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'upsert_count': 3, 'cost': 0}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 更新数据\n",
    "# 插入或更新数据，如果 ID 已存在则更新，否则插入\n",
    "docs = [\n",
    "    \"Alan Turing was the first person to conduct substantial research in AI.\",\n",
    "    \"Artificial intelligence was founded as an academic discipline in 1956.\",\n",
    "    \"Born in Maida Vale, London, Turing was raised in southern England.\",\n",
    "]\n",
    "\n",
    "vectors = [[ np.random.uniform(-1, 1) for _ in range(384) ] for _ in range(len(docs)) ]\n",
    "data = [ {\"id\": i, \"vector\": vectors[i], \"text\": docs[i], \"subject\": \"history\"} for i in range(len(vectors)) ]\n",
    "client.upsert(\n",
    "    collection_name=\"demo_collection\",\n",
    "    data=data\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "6fe9245a-4cb9-4a5a-9863-c9fe42839a29",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'delete_count': 3}\n"
     ]
    }
   ],
   "source": [
    "#删除数据\n",
    "res = client.delete(\n",
    "    collection_name=\"demo_collection\",\n",
    "    filter=\"subject == 'history'\",\n",
    ")\n",
    "print(res)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "8c0af903-7cda-4102-ba7b-094ef8aedae6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 删除集合\n",
    "client.drop_collection(collection_name=\"demo_collection\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "d3d3d2cb-77e0-42db-a31e-ba526188cf54",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Stop your milvus server\n",
    "default_server.stop()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c698f959-cdb4-4b4c-bde0-0bc438a2ef94",
   "metadata": {},
   "source": [
    "### 索引的操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c28158bd-f5bb-46ac-a6d7-db9c9dc20339",
   "metadata": {},
   "outputs": [],
   "source": [
    "## 创建集合\n",
    "fields = [ FieldSchema(name=\"id\", dtype=DataType.INT64, is_primary=True, auto_id=False), \n",
    "           FieldSchema(name=\"vector\", dtype=DataType.FLOAT_VECTOR, dim=128) ]\n",
    "client.create_collection(\n",
    "    collection_name=\"example_collection\",\n",
    "    schema= CollectionSchema(fields=fields, enable_dynamic_field=True),\n",
    "    metric_type=\"L2\",  # 欧几里得距离\n",
    ")\n",
    "\n",
    "# 创建 HNSW 索引\n",
    "index_params={\n",
    "        \"field_name\":\"vector\",\n",
    "        \"index_type\": \"HNSW\",\n",
    "        \"metric_type\": \"L2\",\n",
    "        \"index_name\":\"vector_index\",\n",
    "        \"params\": {\"M\": 16, \"efConstruction\": 200}\n",
    "    }\n",
    "client.create_index(\n",
    "    collection_name=\"example_collection\",\n",
    "    index_params = IndexParams(**index_params)\n",
    ")\n",
    "\n",
    "# 查看索引信息\n",
    "index_info = client.describe_index(collection_name=\"example_collection\", index_name = \"vector_index\")\n",
    "print(\"Index info:\", index_info)\n",
    "\n",
    "# 删除索引\n",
    "# client.drop_index(collection_name=\"example_collection\", index_name = \"vector_index\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eee81a4f-0045-4ffc-a167-c86cf4a6e2cd",
   "metadata": {},
   "source": [
    "### 其他一些数据库管理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1487c05-bc95-4d8a-9d4f-1c2d130e16f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 分区\n",
    "# 权限管理"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2969c143-2de9-4943-b705-ec411a8df311",
   "metadata": {},
   "source": [
    "### AsyncMilvusClient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0316a559-1804-4833-a4c2-523923678252",
   "metadata": {},
   "outputs": [],
   "source": [
    "# AsyncMilvusClient 是 pymilvus 2.5.x 新增的异步客户端，支持异步编程，适合高并发场景。\n",
    "# 功能与 MilvusClient 类似，但使用 await 关键字"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "python3.10",
   "language": "python",
   "name": "py310"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
